source("Downstream.v2.R")

# loading libraries
library(Seurat)
library(rrrSingleCellUtils)
library(ggplot2)
library(msigdbr)
library(dplyr)

Load Seurat objects

Load Seurat objects here so only have to complete once (commented out elsewhere).

Load Osteoblasts

# Create Seurat objects and perform initial QC.  Label original source.  osteoblasts
osteoblasts <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0031/outs/filtered_feature_bc_matrix/")


osteoblasts <- subset(osteoblasts, subset = nCount_RNA < 20000 & percent.mt < 10)

osteoblasts$src <- "osteoblasts"
osteoblasts$model <- "osteoblasts"

Load OS17

# OS17 Culture
os17_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0016/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os17_cx_raw <- subset(os17_cx_raw, subset = nFeature_RNA > 3500 & nCount_RNA < 50000 & percent.mt <
    15)

os17_cx_raw$src <- "Culture"
os17_cx_raw$model <- "OS-17"

## Tibia
os17_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0018xS0028/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os17_tib_raw <- subset(os17_tib_raw, subset = nFeature_RNA > 3000 & nCount_RNA < 60000 & percent.mt <
    18)

os17_tib_raw$src <- "Tibia"
os17_tib_raw$model <- "OS-17"

## Lung double check usage throughout rest of code - Emily (fixed issue where coming from
## wrong file)
os17_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home2/gdrobertslab/lab/Counts/S0024xS0029/outs/filtered_feature_bc_matrix",
    species_pattern = "^hg19-")


os17_lung_raw <- subset(os17_lung_raw, subset = nFeature_RNA > 1250 & nCount_RNA < 60000 & percent.mt <
    25)

os17_lung_raw$src <- "Lung"
os17_lung_raw$model <- "OS-17"

Process OS17 culture for Figure 1

os17_cx <- NormalizeData(os17_cx_raw) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os17_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 117232
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds

DimPlot(os17_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)


# Regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17_cx <- CellCycleScoring(object = os17_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17_cx <- ScaleData(object = os17_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17_cx),
    block.size = 10000)

os17_cx <- RunPCA(os17_cx, pc.genes = os17_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8908
## Number of communities: 5
## Elapsed time: 0 seconds

# Find optimal clustering resolution
os17_cx_nc <- nRes(os17_cx, res = seq(from = 0.1, to = 0.2, by = 0.05))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 5
## Elapsed time: 0 seconds

plot <- pSil(os17_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds

## NULL
plot
## NULL

# With res = 0.15, cells in cluster 3 do not have any significantly different genes that are
# deferentially regulated Therefore, we decided to proceed with res = 0.1
os17_cx <- FindClusters(os17_cx, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds

if (!file.exists("Data")) {
    dir.create("Data")
}
save(os17_cx, file = "Data/os17_cx_CCR.RData")

Add lineage tags to metadata for Figure 5

For lineage analysis in Figure 5.

# Plot histograms of lineage tracing tags in the Seurat objects

if (!file.exists("Data/S0024xS0029.ltbc")) {
    s <- c("S0016", "S0018xS0028", "S0024xS0029")
    for (i in s) {
        gen_cellecta_bc_data(file = paste0("/gpfs0/home/gdrobertslab/lab/Counts/", i, "/outs/possorted_genome_bam.bam"),
            verbose = TRUE, output = paste0("Data/", i, ".ltbc"), samtools_module = "SAMtools")
    }

    for (i in s) {
        ltbc <- read.table(paste0("Data/", i, ".ltbc"), header = T, sep = "\t")
        if (substr(ltbc[[1, 1]], 1, 5) == "CB:Z:") {
            ltbc$cid <- substr(ltbc$cid, 6, 22)
            write.table(ltbc, paste0("Data/", i, ".ltbc"), sep = "\t")
        }
    }
}

os17_cx_raw <- process_ltbc(os17_cx_raw, cid_lt = read.table("Data/S0016.ltbc", header = T, sep = "\t"),
    histogram = F, relative = T)

os17_tib_raw <- process_ltbc(os17_tib_raw, cid_lt = read.table("Data/S0018xS0028.ltbc", sep = "\t"),
    histogram = F, relative = T)

os17_lung_raw <- process_ltbc(os17_lung_raw, cid_lt = read.table("Data/S0024xS0029.ltbc", sep = "\t"),
    histogram = F, relative = T)

Create OS17 object for lineage analysis in Figure 5

# Subset all to 2800 cells in each condition
os17_cx <- subset(os17_cx_raw, cells = sample(Cells(os17_cx_raw), 2800))
os17.tib <- subset(os17_tib_raw, cells = sample(Cells(os17_tib_raw), 2800))
os17.lung <- subset(os17_lung_raw, cells = sample(Cells(os17_lung_raw), 2800))

# Merge into a single Seurat object
os17 <- merge(os17_cx, y = c(os17.tib, os17.lung), add.cell_ids = c("Culture", "Tibia", "Lung"),
    project = "LineageTracing")

# Process and cluster
os17 <- NormalizeData(os17) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8400
## Number of edges: 289959
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9048
## Number of communities: 8
## Elapsed time: 0 seconds

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

os17 <- CellCycleScoring(object = os17, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

os17 <- ScaleData(object = os17, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17),
    block.size = 10000)

os17 <- RunPCA(os17, pc.genes = os17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8400
## Number of edges: 280874
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8977
## Number of communities: 7
## Elapsed time: 0 seconds

DimPlot(os17, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS17 Clusters") +
    scale_color_npg(alpha = 0.7)


# Plot the data
set.seed(100)
cell_ids <- sample(colnames(os17))

DimPlot(os17, reduction = "umap", group.by = "src", pt.size = 1, label = F, order = cell_ids) +
    coord_fixed() + ggtitle("OS17 by Source") + scale_color_npg()


save(os17, file = "Data/os17.RData")

Load t143b

# t143b Culture
t143b_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0017/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_cx_raw <- subset(t143b_cx_raw, subset = nFeature_RNA > 4000 & nCount_RNA < 74000 & percent.mt <
    17)

t143b_cx_raw$src <- "Culture"
t143b_cx_raw$model <- "143B"

## Tibia
t143b_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0052/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_tib_raw <- subset(t143b_tib_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
    22)

t143b_tib_raw$src <- "Tibia"
t143b_tib_raw$model <- "143B"

# Lung
t143b_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0019-143B-lung/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_lung_raw <- subset(t143b_lung_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
    22)

t143b_lung_raw$src <- "Lung"
t143b_lung_raw$model <- "143B"

Load NCH-OS2

# NCHOS2 Flank
os2_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0076/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_cx_raw <- subset(os2_cx_raw, subset = nCount_RNA < 36000 & percent.mt < 50)

os2_cx_raw$src <- "Flank"
os2_cx_raw$model <- "NCH-OS-2"

## Tibia
os2_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0042/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_tib_raw <- subset(os2_tib_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 40000 & percent.mt <
    20)

os2_tib_raw$src <- "Tibia"
os2_tib_raw$model <- "NCH-OS-2"

## Lung
os2_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0041/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_lung_raw <- subset(os2_lung_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 60000 & percent.mt <
    30)

os2_lung_raw$src <- "Lung"
os2_lung_raw$model <- "NCH-OS-2"

Load NCH-OS7

# NCHOS7 Flank
os7_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0043/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_cx_raw <- subset(os7_cx_raw, subset = nCount_RNA < 50000 & percent.mt < 25)

os7_cx_raw$src <- "Flank"
os7_cx_raw$model <- "NCH-OS-7"

## Tibia
os7_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0034/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_tib_raw <- subset(os7_tib_raw, subset = nFeature_RNA > 2000 & nCount_RNA < 35000 & percent.mt <
    14)

os7_tib_raw$src <- "Tibia"
os7_tib_raw$model <- "NCH-OS-7"

## Lung
os7_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0055/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_lung_raw <- subset(os7_lung_raw, subset = nCount_RNA < 42000 & percent.mt < 20)

os7_lung_raw$src <- "Lung"
os7_lung_raw$model <- "NCH-OS-7"

Process NCHS-OS7 flank for Figure 1

os7_cx <- NormalizeData(os7_cx_raw) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os7_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 68728
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds

DimPlot(os7_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)


# Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

os7_cx <- CellCycleScoring(object = os7_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

os7_cx <- ScaleData(object = os7_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os7_cx),
    block.size = 10000)

os7_cx <- RunPCA(os7_cx, pc.genes = os7_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8676
## Number of communities: 5
## Elapsed time: 0 seconds

# Find optimal clustering resolution
os7_cx_nc <- nRes(os7_cx, res = seq(from = 0.1, to = 0.15, by = 0.01))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9248
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9195
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

plot <- pSil(os7_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

## NULL
plot
## NULL

os7_cx <- FindClusters(os7_cx, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

save(os7_cx, file = "Data/os7_cx_CCR.RData")

Save Raw Objects

# Save raw objects

# Make lists for easy loop saving
sample_list <- c(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
    t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw)

sample_names <- c("osteoblasts", "os17_cx_raw", "os17_tib_raw", "os17_lung_raw", "t143b_cx_raw",
    "t143b_tib_raw", "t143b_lung_raw", "os2_cx_raw", "os2_tib_raw", "os2_lung_raw", "os7_cx_raw",
    "os7_tib_raw", "os7_lung_raw")

# Correlate sample names and sample labels
names(sample_list) <- sample_names

# Save each raw object
for (item in sample_names) {
    # Save each sample as an individual Seurat object with proper name
    assign(item, sample_list[[item]])
    save(list = item, file = paste("Data/", item, ".RData", sep = ""))
}

Merge Objects

Merge objects, process, and cell cycle regress.

# Merge into a single Seurat object
OS <- merge(osteoblasts, y = c(os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
    t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw),
    add.cell_ids = c("Osteoblasts", "OS-17_Culture", "OS-17_Tibia", "OS-17_Lung", "143B_Culture",
        "143B_Tibia", "143B_Lung", "NCH-OS-2_Flank", "NCH-OS-2_Tibia", "NCH-OS-2_Lung", "NCH-OS-7_Flank",
        "NCH-OS-7_Tibia", "NCH-OS-7_Lung"), project = "Heterogeneity")

# Process and cluster
OS <- NormalizeData(OS) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os.17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 54300
## Number of edges: 1853982
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9614
## Number of communities: 10
## Elapsed time: 15 seconds

# rm(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
# t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw,
# os7_lung_raw)

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

OS <- CellCycleScoring(object = OS, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

OS <- ScaleData(object = OS, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(OS),
    block.size = 10000)

OS <- RunPCA(OS, pc.genes = OS@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 54300
## Number of edges: 1824736
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9610
## Number of communities: 9
## Elapsed time: 14 seconds

save(OS, file = "Data/OS_merged_postCCR.RData")

OS$model <- as.factor(OS$model)
OS$model <- factor(as.factor(OS$model), levels = c("OS-17", "143B", "NCH-OS-2", "NCH-OS-7", "Osteoblasts"))

# Plot the data
DimPlot(OS, reduction = "umap", group.by = "model", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Model") +
    scale_color_npg(alpha = 1)


DimPlot(OS, reduction = "umap", group.by = "src", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Source")


DimPlot(OS, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Clusters") +
    scale_color_npg(alpha = 0.7)

List Objects and Process

List objects, process, and cell cycle regress. Used in Figure 3B - ITH code.

# Create List to compute ITH scores (subset to 1500 cells for each)
OS.list_1500 <- list(osteoblasts = osteoblasts, OS17.cx = os17_cx_raw, OS17.Tibia = os17_tib_raw,
    OS17.Lung = os17_lung_raw, t143B.cx = t143b_cx_raw, t143B.Tibia = t143b_tib_raw, t143B.Lung = t143b_lung_raw,
    OS2.Flank = os2_cx_raw, OS2.Tibia = os2_tib_raw, OS2.Lung = os2_lung_raw, OS7.Flank = os7_cx_raw,
    OS7.Tibia = os7_tib_raw, OS7.Lung = os7_lung_raw)

for (i in 1:length(OS.list_1500)) {
    OS.list_1500[[i]] <- NormalizeData(OS.list_1500[[i]]) %>%
        FindVariableFeatures(selection.method = "vst") %>%
        ScaleData() %>%
        RunPCA(pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3) %>%
        subset(cells = sample(Cells(OS.list_1500[[i]]), 1500))
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5735
## Number of edges: 201174
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8642
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 117232
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2849
## Number of edges: 103732
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4574
## Number of edges: 156586
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8373
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3363
## Number of edges: 123143
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8215
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6763
## Number of edges: 226939
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8477
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5134
## Number of edges: 181392
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6749
## Number of edges: 222345
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8705
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4285
## Number of edges: 148107
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8520
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1648
## Number of edges: 57506
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8356
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 68728
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2891
## Number of edges: 102969
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5133
## Number of edges: 177023
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 9
## Elapsed time: 0 seconds

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

for (i in 1:length(OS.list_1500)) {
    OS.list_1500[[i]] <- CellCycleScoring(object = OS.list_1500[[i]], s.features = s.genes, g2m.features = g2m.genes,
        set.ident = TRUE)

    OS.list_1500[[i]] <- ScaleData(object = OS.list_1500[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
        features = VariableFeatures(OS.list_1500[[i]]), block.size = 10000)  # Change to VariableFeatures()

    OS.list_1500[[i]] <- RunPCA(OS.list_1500[[i]], pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 61069
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8390
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 61975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8650
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56704
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8091
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56977
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7771
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 58610
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7691
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 60734
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7791
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56073
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7987
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 52995
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8059
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 52729
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7782
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 51652
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7883
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 51919
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 55164
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8026
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56741
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8093
## Number of communities: 4
## Elapsed time: 0 seconds

save(OS.list_1500, file = "Data/OSlist_1500_CCR.RData")

List objects, process, and cell cycle regress. Used in Figure 3 C-D code.

# Create list for usage in Figure 3
tibia_list <- list(OS17.Tibia = os17_tib_raw, t143B.Tibia = t143b_tib_raw, OS2.Tibia = os2_tib_raw,
    OS7.Tibia = os7_tib_raw)

lung_list <- list(OS17.Lung = os17_lung_raw, t143B.Lung = t143b_lung_raw, OS2.Lung = os2_lung_raw,
    OS7.Lung = os7_lung_raw)

OS_list <- list()

for (i in 1:length(tibia_list)) {
    message(i, head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1))
    # Subset number of cells in each Seurat object to the least in the group
    a <- table(tibia_list[[i]]$orig.ident)
    b <- table(lung_list[[i]]$orig.ident)
    list <- c(a, b)
    min <- min(list)

    tibia_list[[i]] <- subset(tibia_list[[i]], cells = sample(Cells(tibia_list[[i]]), min))

    lung_list[[i]] <- subset(lung_list[[i]], cells = sample(Cells(lung_list[[i]]), min))

    title <- head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1)
    title <- gsub("-", "", title)

    OS_list[[title]] <- merge(tibia_list[[i]], y = c(lung_list[[i]]), add.cell_ids = c(head(paste(tibia_list[[i]]$model,
        "_", tibia_list[[i]]$src, sep = ""), 1), head(paste(lung_list[[i]]$model, "_", lung_list[[i]]$src,
        sep = ""), 1)), project = "Heterogeneity")
}
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

for (i in 1:length(OS_list)) {
    OS_list[[i]] <- NormalizeData(OS_list[[i]]) %>%
        FindVariableFeatures(selection.method = "vst") %>%
        ScaleData() %>%
        RunPCA(pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)

    OS_list[[i]] <- CellCycleScoring(object = OS_list[[i]], s.features = s.genes, g2m.features = g2m.genes,
        set.ident = TRUE)

    OS_list[[i]] <- ScaleData(object = OS_list[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
        features = VariableFeatures(OS_list[[i]]), block.size = 10000)  # Change to VariableFeatures()

    OS_list[[i]] <- RunPCA(OS_list[[i]], pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5698
## Number of edges: 195555
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8596
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5698
## Number of edges: 190346
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8444
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10268
## Number of edges: 352757
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8850
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10268
## Number of edges: 343259
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3296
## Number of edges: 114815
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3296
## Number of edges: 110588
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8283
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5782
## Number of edges: 203134
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8971
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5782
## Number of edges: 201063
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8855
## Number of communities: 5
## Elapsed time: 0 seconds

save(OS_list, file = "Data/OS_list_CCR.RData")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux 8.4 (Ootpa)
## 
## Matrix products: default
## BLAS:   /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRblas.so
## LAPACK: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] cluster_2.1.4            msigdbr_7.5.1            rrrSingleCellUtils_0.5.0
##  [4] nichenetr_1.1.1          RColorBrewer_1.1-3       clusterProfiler_4.2.2   
##  [7] ggplot2_3.4.0            reshape2_1.4.4           SeuratObject_4.1.3      
## [10] Seurat_4.3.0             reticulate_1.26          Matrix_1.5-3            
## [13] dplyr_1.0.10             cowplot_1.1.1            ggsci_2.9               
## 
## loaded via a namespace (and not attached):
##   [1] scattermore_0.8        ModelMetrics_1.2.2.2   R.methodsS3_1.8.2     
##   [4] tidyr_1.2.1            bit64_4.0.5            knitr_1.41            
##   [7] R.utils_2.12.2         irlba_2.3.5.1          data.table_1.14.6     
##  [10] rpart_4.1.19           KEGGREST_1.34.0        hardhat_1.2.0         
##  [13] RCurl_1.98-1.6         doParallel_1.0.17      generics_0.1.3        
##  [16] BiocGenerics_0.40.0    RSQLite_2.2.19         shadowtext_0.1.2      
##  [19] RANN_2.6.1             proxy_0.4-27           future_1.29.0         
##  [22] DiagrammeR_1.0.9       bit_4.0.5              tzdb_0.3.0            
##  [25] enrichplot_1.14.2      spatstat.data_3.0-0    lubridate_1.9.0       
##  [28] httpuv_1.6.6           assertthat_0.2.1       viridis_0.6.2         
##  [31] gower_1.0.0            xfun_0.35              hms_1.1.2             
##  [34] jquerylib_0.1.4        babelgene_22.9         evaluate_0.18         
##  [37] promises_1.2.0.1       fansi_1.0.3            caTools_1.18.2        
##  [40] igraph_1.3.1           DBI_1.1.3              htmlwidgets_1.5.4     
##  [43] spatstat.geom_3.0-3    stats4_4.1.0           purrr_0.3.5           
##  [46] ellipsis_0.3.2         backports_1.4.1        deldir_1.0-6          
##  [49] vctrs_0.5.1            Biobase_2.54.0         ROCR_1.0-11           
##  [52] abind_1.4-5            caret_6.0-93           cachem_1.0.6          
##  [55] withr_2.5.0            ggforce_0.4.1          progressr_0.11.0      
##  [58] checkmate_2.1.0        sctransform_0.3.5      treeio_1.18.1         
##  [61] fdrtool_1.2.17         goftest_1.2-3          DOSE_3.20.1           
##  [64] ape_5.6-2              lazyeval_0.2.2         crayon_1.5.2          
##  [67] spatstat.explore_3.0-5 labeling_0.4.2         recipes_1.0.3         
##  [70] pkgconfig_2.0.3        tweenr_2.0.2           GenomeInfoDb_1.30.1   
##  [73] nlme_3.1-160           nnet_7.3-18            rlang_1.0.6           
##  [76] globals_0.16.2         lifecycle_1.0.3        miniUI_0.1.1.1        
##  [79] downloader_0.4         randomForest_4.7-1.1   polyclip_1.10-4       
##  [82] matrixStats_0.63.0     lmtest_0.9-40          aplot_0.1.9           
##  [85] zoo_1.8-11             base64enc_0.1-3        ggridges_0.5.4        
##  [88] GlobalOptions_0.1.2    png_0.1-8              viridisLite_0.4.1     
##  [91] rjson_0.2.21           bitops_1.0-7           R.oo_1.25.0           
##  [94] KernSmooth_2.23-20     visNetwork_2.1.2       pROC_1.18.0           
##  [97] Biostrings_2.62.0      blob_1.2.3             shape_1.4.6           
## [100] stringr_1.5.0          qvalue_2.26.0          parallelly_1.32.1     
## [103] spatstat.random_3.0-1  jpeg_0.1-9             readr_2.1.3           
## [106] gridGraphics_0.5-1     S4Vectors_0.32.4       scales_1.2.1          
## [109] memoise_2.0.1          magrittr_2.0.3         plyr_1.8.8            
## [112] ica_1.0-3              zlibbioc_1.40.0        compiler_4.1.0        
## [115] scatterpie_0.1.8       clue_0.3-63            fitdistrplus_1.1-8    
## [118] cli_3.4.1              XVector_0.34.0         listenv_0.8.0         
## [121] patchwork_1.1.2        pbapply_1.6-0          htmlTable_2.4.1       
## [124] formatR_1.12           Formula_1.2-4          MASS_7.3-58.1         
## [127] tidyselect_1.2.0       stringi_1.7.8          highr_0.9             
## [130] yaml_2.3.6             GOSemSim_2.20.0        latticeExtra_0.6-30   
## [133] ggrepel_0.9.2          grid_4.1.0             sass_0.4.4            
## [136] fastmatch_1.1-3        tools_4.1.0            timechange_0.1.1      
## [139] future.apply_1.10.0    parallel_4.1.0         rstudioapi_0.14       
## [142] circlize_0.4.15        foreign_0.8-83         foreach_1.5.2         
## [145] gridExtra_2.3          prodlim_2019.11.13     farver_2.1.1          
## [148] Rtsne_0.16             ggraph_2.1.0           digest_0.6.30         
## [151] shiny_1.7.3            lava_1.7.0             Rcpp_1.0.9            
## [154] later_1.3.0            RcppAnnoy_0.0.20       httr_1.4.4            
## [157] AnnotationDbi_1.56.2   ComplexHeatmap_2.10.0  colorspace_2.0-3      
## [160] tensor_1.5             IRanges_2.28.0         splines_4.1.0         
## [163] uwot_0.1.14            yulab.utils_0.0.5      tidytree_0.4.1        
## [166] spatstat.utils_3.0-1   graphlayouts_0.8.4     sp_1.5-1              
## [169] ggplotify_0.1.0        plotly_4.10.1          xtable_1.8-4          
## [172] jsonlite_1.8.4         ggtree_3.2.1           tidygraph_1.2.2       
## [175] timeDate_4021.106      ggfun_0.0.9            ipred_0.9-13          
## [178] R6_2.5.1               Hmisc_4.7-2            pillar_1.8.1          
## [181] htmltools_0.5.4        mime_0.12              glue_1.6.2            
## [184] fastmap_1.1.0          BiocParallel_1.28.3    class_7.3-20          
## [187] codetools_0.2-18       fgsea_1.20.0           utf8_1.2.2            
## [190] lattice_0.20-45        bslib_0.4.1            spatstat.sparse_3.0-0 
## [193] tibble_3.1.8           leiden_0.4.3           interp_1.1-3          
## [196] GO.db_3.14.0           limma_3.50.3           survival_3.4-0        
## [199] rmarkdown_2.18         munsell_0.5.0          e1071_1.7-12          
## [202] DO.db_2.9              GetoptLong_1.0.5       GenomeInfoDbData_1.2.7
## [205] iterators_1.0.14       gtable_0.3.1